The goal of this analysis is to develop a regression to predict water temperature when data are missing or the time series is incomplete.
Currently this analysis is performed for Feather River and Yuba River. The resulting dataset with predicted values are saved and integrated in the development of a water temperature dataset.
Butte Creek is used to build the regression models because the time series is complete and the data are high quality.
Plot of mean temp for Feather River LFC and Butte Creek
## `geom_smooth()` using formula = 'y ~ x'
Plot of min temp for Feather River LFC and Butte Creek
## `geom_smooth()` using formula = 'y ~ x'
Plot of max temp for Feather River LFC and Butte Creek
## `geom_smooth()` using formula = 'y ~ x'
# LFC regression and predictions
# MEAN Regression
split <-rsample::initial_split(feather_lfc_regression_data_mean, prop = 0.8)
train <- rsample::training(split)
test <- rsample::testing(split)
feather_lfc_mod_mean <- lm(temp ~ date + butte_temp, data = train)
summary(feather_lfc_mod_mean)
##
## Call:
## lm(formula = temp ~ date + butte_temp, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.8395 -0.8432 0.0335 0.7871 5.9478
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.446e+00 1.154e+00 -2.986 0.00287 **
## date 5.728e-04 6.144e-05 9.323 < 2e-16 ***
## butte_temp 4.137e-01 6.247e-03 66.215 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.382 on 1565 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.7431, Adjusted R-squared: 0.7427
## F-statistic: 2263 on 2 and 1565 DF, p-value: < 2.2e-16
test_predict <- predict(feather_lfc_mod_mean, test)
test_predict_df <- test |>
mutate(predicted = test_predict)
# evaluate predictions - MAPE of 10% is not bad
mean(abs((
test_predict_df$predicted - test_predict_df$temp
)) / test_predict_df$temp)
## [1] 0.08329577
# MIN Regression
split <-rsample::initial_split(feather_lfc_regression_data_min, prop = 0.8)
train <- rsample::training(split)
test <- rsample::testing(split)
feather_lfc_mod_min <- lm(temp ~ date + butte_temp, data = train)
summary(feather_lfc_mod_min)
##
## Call:
## lm(formula = temp ~ date + butte_temp, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.6302 -0.8263 0.0339 0.7729 6.2951
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.265e+00 1.139e+00 -1.989 0.0469 *
## date 5.116e-04 6.064e-05 8.436 <2e-16 ***
## butte_temp 3.855e-01 6.613e-03 58.293 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.347 on 1565 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.6922, Adjusted R-squared: 0.6918
## F-statistic: 1760 on 2 and 1565 DF, p-value: < 2.2e-16
test_predict <- predict(feather_lfc_mod_min, test)
test_predict_df <- test |>
mutate(predicted = test_predict)
# evaluate predictions - MAPE of 10% is not bad
mean(abs((
test_predict_df$predicted - test_predict_df$temp
)) / test_predict_df$temp)
## [1] 0.08777431
# MAX Regression
split <-rsample::initial_split(feather_lfc_regression_data_max, prop = 0.8)
train <- rsample::training(split)
test <- rsample::testing(split)
feather_lfc_mod_max <- lm(temp ~ date + butte_temp, data = train)
summary(feather_lfc_mod_max)
##
## Call:
## lm(formula = temp ~ date + butte_temp, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.1963 -0.7564 -0.0088 0.7945 6.1494
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.414e+00 1.178e+00 -3.746 0.000186 ***
## date 6.159e-04 6.258e-05 9.842 < 2e-16 ***
## butte_temp 4.402e-01 5.806e-03 75.813 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.414 on 1565 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.7892, Adjusted R-squared: 0.7889
## F-statistic: 2929 on 2 and 1565 DF, p-value: < 2.2e-16
test_predict <- predict(feather_lfc_mod_max, test)
test_predict_df <- test |>
mutate(predicted = test_predict)
# evaluate predictions - MAPE of 10% is not bad
mean(abs((
test_predict_df$predicted - test_predict_df$temp
)) / test_predict_df$temp)
## [1] 0.08601363
# Predictions
feather_gap_predicted_mean_lfc <- predict(feather_lfc_mod_mean, feather_gap_mean)
feather_lfc_mean_predicted <- feather_gap_mean |>
mutate(value = feather_gap_predicted_mean_lfc,
statistic = "mean") |>
select(date, value, statistic)
ggplot(feather_lfc_mean_predicted, aes(x = date, y = value)) +
geom_line()
feather_gap_predicted_min_lfc <- predict(feather_lfc_mod_min, feather_gap_min)
feather_lfc_min_predicted <- feather_gap_min |>
mutate(value = feather_gap_predicted_min_lfc,
statistic = "min") |>
select(date, value, statistic)
ggplot(feather_lfc_min_predicted, aes(x = date, y = value)) +
geom_line()
feather_gap_predicted_max_lfc <- predict(feather_lfc_mod_max, feather_gap_max)
feather_lfc_max_predicted <- feather_gap_max |>
mutate(value = feather_gap_predicted_max_lfc,
statistic = "max") |>
select(date, value, statistic)
ggplot(feather_lfc_max_predicted, aes(x = date, y = value)) +
geom_line()
feather_gap_lfc <- bind_rows(feather_lfc_max_predicted,
feather_lfc_mean_predicted,
feather_lfc_min_predicted) |>
mutate(stream = "feather river",
site_group = "upper feather lfc") %>%
pivot_wider(id_cols = c(date, stream, site_group), values_from = "value", names_from = "statistic") |>
rename(mean_i = mean,
max_i = max,
min_i = min) |>
full_join(feather_lfc_format |>
pivot_wider(id_cols = c(stream, date, gage_agency, gage_number, site_group), values_from = "value", names_from = "statistic")) |>
mutate(gage_agency = ifelse(is.na(mean), "interpolated", gage_agency),
gage_number = ifelse(is.na(mean), "interpolated", gage_number),
mean = ifelse(is.na(mean), mean_i, mean),
min = ifelse(is.na(min), min_i, min),
max = ifelse(is.na(max), max_i, max)) |>
select(-c(min_i, mean_i, max_i)) |>
pivot_longer(cols = mean:min, values_to = "value", names_to = "statistic") |>
group_by(stream, date, statistic, gage_agency, gage_number, site_group) |> glimpse()
## Joining with `by = join_by(date, stream, site_group)`
## Rows: 26,367
## Columns: 7
## Groups: stream, date, statistic, gage_agency, gage_number, site_group [26,367]
## $ date <date> 1999-12-31, 1999-12-31, 1999-12-31, 2000-01-01, 2000-01-0…
## $ stream <chr> "feather river", "feather river", "feather river", "feathe…
## $ site_group <chr> "upper feather lfc", "upper feather lfc", "upper feather l…
## $ gage_agency <chr> "interpolated", "interpolated", "interpolated", "interpola…
## $ gage_number <chr> "interpolated", "interpolated", "interpolated", "interpola…
## $ statistic <chr> "mean", "max", "min", "mean", "max", "min", "mean", "max",…
## $ value <dbl> 4.814357, 4.446810, 5.189872, 4.883876, 4.887606, 4.997627…
ggplot() +
geom_line(data = feather_gap_lfc, aes(x = date, y = value, color = statistic)) +
theme_minimal()
## Warning: Removed 3 rows containing missing values (`geom_line()`).
Plot of mean temp for Feather River HFC and Butte Creek
# plot for mean
ggplot(data = feather_hfc_regression_data_mean, aes(x = butte_temp, y = temp)) +
geom_point() +
stat_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'
Plot of min temp for Feather River HFC and Butte Creek
# plot for mean
ggplot(data = feather_hfc_regression_data_min, aes(x = butte_temp, y = temp)) +
geom_point() +
stat_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'
Plot of max temp for Feather River HFC and Butte Creek
# plot for mean
ggplot(data = feather_hfc_regression_data_max, aes(x = butte_temp, y = temp)) +
geom_point() +
stat_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'
# HFC regression and predictions
# MEAN Regression
split <-rsample::initial_split(feather_hfc_regression_data_mean, prop = 0.8)
train <- rsample::training(split)
test <- rsample::testing(split)
feather_hfc_mod_mean <- lm(temp ~ date + butte_temp, data = train)
summary(feather_hfc_mod_mean)
##
## Call:
## lm(formula = temp ~ date + butte_temp, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.4405 -1.3154 -0.1361 1.4364 4.9478
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15.9770748 2.4046439 6.644 5.00e-11 ***
## date -0.0007853 0.0001889 -4.158 3.49e-05 ***
## butte_temp 0.6603000 0.0138146 47.797 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.218 on 1001 degrees of freedom
## Multiple R-squared: 0.6954, Adjusted R-squared: 0.6948
## F-statistic: 1143 on 2 and 1001 DF, p-value: < 2.2e-16
test_predict <- predict(feather_hfc_mod_mean, test)
test_predict_df <- test |>
mutate(predicted = test_predict)
# evaluate predictions - MAPE of 10% is not bad
mean(abs((
test_predict_df$predicted - test_predict_df$temp
)) / test_predict_df$temp)
## [1] NA
# MIN Regression
split <-rsample::initial_split(feather_hfc_regression_data_min, prop = 0.8)
train <- rsample::training(split)
test <- rsample::testing(split)
feather_hfc_mod_min <- lm(temp ~ date + butte_temp, data = train)
summary(feather_hfc_mod_min)
##
## Call:
## lm(formula = temp ~ date + butte_temp, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.9841 -1.1224 0.0197 1.2451 4.3444
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.0944603 2.1503179 7.485 1.57e-13 ***
## date -0.0008143 0.0001688 -4.824 1.62e-06 ***
## butte_temp 0.6780694 0.0135243 50.137 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.966 on 1000 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.7158, Adjusted R-squared: 0.7152
## F-statistic: 1259 on 2 and 1000 DF, p-value: < 2.2e-16
test_predict <- predict(feather_hfc_mod_min, test)
test_predict_df <- test |>
mutate(predicted = test_predict)
# evaluate predictions - MAPE of 10% is not bad
mean(abs((
test_predict_df$predicted - test_predict_df$temp
)) / test_predict_df$temp)
## [1] 0.1172097
# MAX Regression
split <-rsample::initial_split(feather_hfc_regression_data_max, prop = 0.8)
train <- rsample::training(split)
test <- rsample::testing(split)
feather_hfc_mod_max <- lm(temp ~ date + butte_temp, data = train)
summary(feather_hfc_mod_max)
##
## Call:
## lm(formula = temp ~ date + butte_temp, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.1371 -1.3613 -0.2248 1.7535 5.8303
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18.2663624 2.8496087 6.410 2.24e-10 ***
## date -0.0009096 0.0002240 -4.061 5.26e-05 ***
## butte_temp 0.6100411 0.0144402 42.246 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.615 on 1000 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.641, Adjusted R-squared: 0.6403
## F-statistic: 892.9 on 2 and 1000 DF, p-value: < 2.2e-16
test_predict <- predict(feather_hfc_mod_max, test)
test_predict_df <- test |>
mutate(predicted = test_predict)
# evaluate predictions - MAPE of 10% is not bad
mean(abs((
test_predict_df$predicted - test_predict_df$temp
)) / test_predict_df$temp)
## [1] 0.1330883
# Predictions
feather_gap_predicted_mean_hfc <- predict(feather_hfc_mod_mean, feather_gap_mean)
feather_hfc_mean_predicted <- feather_gap_mean |>
mutate(value = feather_gap_predicted_mean_hfc,
statistic = "mean") |>
select(date, value, statistic)
ggplot(feather_hfc_mean_predicted, aes(x = date, y = value)) +
geom_line()
feather_gap_predicted_min_hfc <- predict(feather_hfc_mod_min, feather_gap_min)
feather_hfc_min_predicted <- feather_gap_min |>
mutate(value = feather_gap_predicted_min_hfc,
statistic = "min") |>
select(date, value, statistic)
ggplot(feather_hfc_min_predicted, aes(x = date, y = value)) +
geom_line()
feather_gap_predicted_max_hfc <- predict(feather_hfc_mod_max, feather_gap_max)
feather_hfc_max_predicted <- feather_gap_max |>
mutate(value = feather_gap_predicted_max_hfc,
statistic = "max") |>
select(date, value, statistic)
ggplot(feather_hfc_max_predicted, aes(x = date, y = value)) +
geom_line()
feather_gap_hfc <- bind_rows(feather_hfc_max_predicted,
feather_hfc_mean_predicted,
feather_hfc_min_predicted) |>
mutate(stream = "feather river",
site_group = "upper feather hfc") %>%
pivot_wider(id_cols = c(date, stream, site_group), values_from = "value", names_from = "statistic") |>
rename(mean_i = mean,
max_i = max,
min_i = min) |>
full_join(feather_hfc_format |>
pivot_wider(id_cols = c(stream, date, gage_agency, gage_number, site_group), values_from = "value", names_from = "statistic")) |>
mutate(gage_agency = ifelse(is.na(mean), "interpolated", gage_agency),
gage_number = ifelse(is.na(mean), "interpolated", gage_number),
mean = ifelse(is.na(mean), mean_i, mean),
min = ifelse(is.na(min), min_i, min),
max = ifelse(is.na(max), max_i, max)) |>
select(-c(min_i, mean_i, max_i)) |>
pivot_longer(cols = mean:min, values_to = "value", names_to = "statistic") |>
group_by(stream, date, statistic, gage_agency, gage_number, site_group) |> glimpse()
## Joining with `by = join_by(date, stream, site_group)`
## Rows: 26,403
## Columns: 7
## Groups: stream, date, statistic, gage_agency, gage_number, site_group [26,403]
## $ date <date> 1999-12-31, 1999-12-31, 1999-12-31, 2000-01-01, 2000-01-0…
## $ stream <chr> "feather river", "feather river", "feather river", "feathe…
## $ site_group <chr> "upper feather hfc", "upper feather hfc", "upper feather h…
## $ gage_agency <chr> "interpolated", "interpolated", "interpolated", "interpola…
## $ gage_number <chr> "interpolated", "interpolated", "interpolated", "interpola…
## $ statistic <chr> "mean", "max", "min", "mean", "max", "min", "mean", "max",…
## $ value <dbl> 10.542370, 11.228641, 10.427630, 10.651635, 11.837773, 10.…
ggplot() +
geom_line(data = feather_gap_hfc, aes(x = date, y = value, color = statistic)) +
theme_minimal()
## Warning: Removed 3 rows containing missing values (`geom_line()`).
Pull in gage data from YR7 CDEC gage, however, this only contains data from 2020 onwards. Temperature data collected along with RST data was originally used to fill the gap prior to 2020; however, due to inconsistencies in these data sources the resulting predicted mean values were lower than the min values as the RST data only has mean data.
Prepare datasets for regression analysis (dataset with no missing data to train and dataset with missing data to predict)
Plot of mean temp for Yuba River and Butte Creek
# plot for mean
ggplot(data = yuba_regression_data_mean, aes(x = butte_temp, y = temp)) +
geom_point() +
stat_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'
Plot of min temp for Yuba River and Butte Creek
# plot for mean
ggplot(data = yuba_regression_data_min, aes(x = butte_temp, y = temp)) +
geom_point() +
stat_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'
Plot of max temp for Yuba River and Butte Creek
# plot for mean
ggplot(data = yuba_regression_data_max, aes(x = butte_temp, y = temp)) +
geom_point() +
stat_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'
# MEAN Regression
split <-rsample::initial_split(yuba_regression_data_mean, prop = 0.8)
train <- rsample::training(split)
test <- rsample::testing(split)
yuba_mod_mean <- lm(temp ~ date + butte_temp, data = train)
summary(yuba_mod_mean)
##
## Call:
## lm(formula = temp ~ date + butte_temp, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.9651 -1.0363 0.0472 1.0556 3.1283
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.199e+01 1.577e+00 13.94 <2e-16 ***
## date -8.313e-04 8.292e-05 -10.03 <2e-16 ***
## butte_temp 5.941e-01 6.825e-03 87.04 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.404 on 1297 degrees of freedom
## Multiple R-squared: 0.8569, Adjusted R-squared: 0.8566
## F-statistic: 3882 on 2 and 1297 DF, p-value: < 2.2e-16
test_predict <- predict(yuba_mod_mean, test)
test_predict_df <- test |>
mutate(predicted = test_predict)
# evaluate predictions - MAPE of 10% is not bad
mean(abs((
test_predict_df$predicted - test_predict_df$temp
)) / test_predict_df$temp)
## [1] 0.08520138
# MIN Regression
split <-rsample::initial_split(yuba_regression_data_min, prop = 0.8)
train <- rsample::training(split)
test <- rsample::testing(split)
yuba_mod_min <- lm(temp ~ date + butte_temp, data = train)
summary(yuba_mod_min)
##
## Call:
## lm(formula = temp ~ date + butte_temp, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.9789 -0.9879 0.0217 1.0360 3.1786
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.298e+01 1.599e+00 14.37 <2e-16 ***
## date -8.884e-04 8.418e-05 -10.55 <2e-16 ***
## butte_temp 5.471e-01 7.510e-03 72.85 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.418 on 1297 degrees of freedom
## Multiple R-squared: 0.8074, Adjusted R-squared: 0.8071
## F-statistic: 2719 on 2 and 1297 DF, p-value: < 2.2e-16
test_predict <- predict(yuba_mod_min, test)
test_predict_df <- test |>
mutate(predicted = test_predict)
# evaluate predictions - MAPE of 10% is not bad
mean(abs((
test_predict_df$predicted - test_predict_df$temp
)) / test_predict_df$temp)
## [1] 0.1006144
# MAX Regression
split <-rsample::initial_split(yuba_regression_data_max, prop = 0.8)
train <- rsample::training(split)
test <- rsample::testing(split)
yuba_mod_max <- lm(temp ~ date + butte_temp, data = train)
summary(yuba_mod_max)
##
## Call:
## lm(formula = temp ~ date + butte_temp, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.1868 -0.9594 0.0410 0.9962 6.3462
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.806e+01 1.571e+00 11.501 < 2e-16 ***
## date -6.222e-04 8.236e-05 -7.554 7.93e-14 ***
## butte_temp 6.390e-01 6.232e-03 102.528 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.408 on 1297 degrees of freedom
## Multiple R-squared: 0.8923, Adjusted R-squared: 0.8922
## F-statistic: 5375 on 2 and 1297 DF, p-value: < 2.2e-16
test_predict <- predict(yuba_mod_max, test)
test_predict_df <- test |>
mutate(predicted = test_predict)
# evaluate predictions - MAPE of 10% is not bad
mean(abs((
test_predict_df$predicted - test_predict_df$temp
)) / test_predict_df$temp)
## [1] 0.07569647
# Predictions
yuba_gap_predicted_mean <- predict(yuba_mod_mean, yuba_gap_mean)
yuba_mean_predicted <- yuba_gap_mean |>
mutate(value = yuba_gap_predicted_mean,
statistic = "mean_predicted") |>
select(date, value, statistic)
ggplot(yuba_mean_predicted, aes(x = date, y = value)) +
geom_line()
## Warning: Removed 1 row containing missing values (`geom_line()`).
yuba_gap_predicted_min <- predict(yuba_mod_min, yuba_gap_min)
yuba_min_predicted <- yuba_gap_min |>
mutate(value = yuba_gap_predicted_min,
statistic = "min_predicted") |>
select(date, value, statistic)
ggplot(yuba_min_predicted, aes(x = date, y = value)) +
geom_line()
## Warning: Removed 1 row containing missing values (`geom_line()`).
yuba_gap_predicted_max <- predict(yuba_mod_max, yuba_gap_max)
yuba_max_predicted <- yuba_gap_max |>
mutate(value = yuba_gap_predicted_max,
statistic = "max_predicted") |>
select(date, value, statistic)
ggplot(yuba_max_predicted, aes(x = date, y = value)) +
geom_line()
## Warning: Removed 1 row containing missing values (`geom_line()`).
yuba_gap <- bind_rows(yuba_max_predicted,
yuba_mean_predicted,
yuba_min_predicted) |>
mutate(stream = "yuba river") |>
pivot_wider(id_cols = c(date, stream), values_from = "value", names_from = "statistic") |>
full_join(yuba_format |>
pivot_wider(id_cols = c(stream, date, gage_agency, gage_number), values_from = "value", names_from = "statistic")) |>
mutate(gage_agency = ifelse(is.na(mean), "interpolated", gage_agency),
gage_number = ifelse(is.na(mean), "interpolated", gage_number),
mean = ifelse(is.na(mean), mean_predicted, mean),
min = ifelse(is.na(min), min_predicted, min),
max = ifelse(is.na(max), max_predicted, max)) |>
select(-c(mean_predicted, max_predicted, min_predicted)) |>
pivot_longer(cols = mean:min, values_to = "value", names_to = "statistic") |>
group_by(stream, date, statistic, gage_agency, gage_number) |> glimpse()
## Joining with `by = join_by(date, stream)`
## Rows: 26,367
## Columns: 6
## Groups: stream, date, statistic, gage_agency, gage_number [26,367]
## $ date <date> 1999-12-31, 1999-12-31, 1999-12-31, 2000-01-01, 2000-01-0…
## $ stream <chr> "yuba river", "yuba river", "yuba river", "yuba river", "y…
## $ gage_agency <chr> "interpolated", "interpolated", "interpolated", "interpola…
## $ gage_number <chr> "interpolated", "interpolated", "interpolated", "interpola…
## $ statistic <chr> "mean", "max", "min", "mean", "max", "min", "mean", "max",…
## $ value <dbl> 15.73326, 14.31347, 15.87375, 15.83144, 14.95185, 15.59932…
ggplot() +
geom_line(data = yuba_gap, aes(x = date, y = value, color = statistic)) +
theme_minimal()
## Warning: Removed 3 rows containing missing values (`geom_line()`).